# Preliminary ----

if (!require(pacman)) install.packages("pacman") # install pacman to install any required packages

pacman::p_load(MASS, tidyverse, cowplot, fixest, marginaleffects, modelsummary, ggbrace, lemon, hexbin, conflicted)

conflict_prefer("filter", "dplyr")
conflict_prefer("select", "dplyr")

options(modelsummary_factory_default = 'kableExtra') 
options(modelsummary_factory_latex = 'kableExtra') 

options(scipen = 999)

basedir <- normalizePath(file.path(here::here("..")))

setwd(basedir)

dir.create("outputs", showWarnings = FALSE)

fulldata_path <- "data/fulldf.rds"
besrounds_path <- "data/BESrounds.rds"
besip_path <- "data/BESIP_clean.rds"
groupappeals_path <- "data/groupappeals.rds"
press_path <- "data/press-appeals.rds"

# Preliminary data processing ----

# load data
sd <- read_rds(fulldata_path)

# truncate dataframe at date limit of dataset
sd <- sd |>
  filter(date<as.Date("2022-07-20"))

# create clustering variables
sd$dyad <- interaction(sd$party, sd$group, sep = " / ")
sd$cl_dyadwave <- interaction(sd$party, sd$group, sd$wave, sep = " / ")

# remove unobserved groups (rural and urban people)
maxnvsum <- sd %>%
  group_by(group) %>%
  summarise(maxnvsum = max(nvsum))

coveredgroups <- maxnvsum %>% 
  filter(maxnvsum!=0) %>%
  pull(group)

sd <- sd %>%
  filter(group %in% coveredgroups)

# survey dataset characteristics
length(unique(sd$id)) # 98109 unique respondents
nrow(sd) # 3482711 observations

# Group linkage trends (Figure 1) ----

#load BES data
fullbes <- read_rds(besrounds_path)

#rescale lookafter to match scale in analysis 
fullbes2 <- fullbes %>% 
  mutate(lookafter100=ifelse(lookafter<5,(100/3)*(lookafter-1),NA),
         month=floor_date(date,unit="months")) %>%  #add month
  filter(party!="Liberal Democrats") #too few libdem obs

#assign each wave to the most typical month
besmonths <- fullbes2 %>% 
  group_by(BES) %>% 
  count(month) %>% 
  arrange(-n) %>% 
  slice_head(n=1) %>% 
  ungroup() %>% 
  transmute(BES,wavemonth=month)

fullbes2 <- fullbes2 %>% 
  left_join(besmonths,by="BES")

#aggregate
aggbes <- fullbes2 %>% 
  group_by(party,group,wavemonth) %>% 
  summarise(meanlook=mean(lookafter100,na.rm=T)) %>% 
  ungroup() %>% 
  mutate(grouplabel=str_to_sentence(group)) %>% 
  rename(month=wavemonth)

#plot
ggplot(aggbes,aes(x=month,y=meanlook,group=party,color=party,shape=party)) +
  geom_line() +
  geom_point() +
  facet_wrap(vars(grouplabel)) +
  theme_bw() +
  theme(legend.position = "none") +
  labs(x="",y="Avg. group linkage") +
  scale_color_manual(values=c("#0054A6","#E4003B"))

ggsave("outputs/figure1a.pdf",width=6,height=6)

#load BESIP data, run the same operation
fullbesip <- read_rds(besip_path) %>% mutate(lookafter=as.numeric(labelled::remove_labels(lookafter)))

#rescale lookafter to match scale in analysis 
fullbesip2 <- fullbesip %>% 
  mutate(lookafter100=ifelse(lookafter<5,(100/3)*(lookafter-1),NA),
         month=floor_date(starttime,unit="months")) %>%  #add month
  filter(group %in% c("ethnic minorities","middle-class people","the unemployed","working class people")) %>% 
  dplyr::select(-wavemonth)

#assign each wave to the most typical month
besipmonths <- fullbesip2 %>% 
  group_by(wave) %>% 
  count(month) %>% 
  arrange(-n) %>% 
  slice_head(n=1) %>% 
  ungroup() %>% 
  transmute(wave,wavemonth=month)

fullbesip2 <- fullbesip2 %>% 
  left_join(besipmonths,by="wave")

#aggregate
aggbesip <- fullbesip2 %>% 
  group_by(party,group,wavemonth) %>% 
  summarise(meanlook=mean(lookafter100,na.rm=T)) %>% 
  ungroup() %>% 
  mutate(grouplabel=str_to_sentence(group)) %>% 
  rename(month=wavemonth)

#plot
besipplot <- ggplot(aggbesip, aes(x=month, y=meanlook, group=party, color=party, shape=party)) +
  geom_line() +
  geom_point() +
  facet_wrap(vars(grouplabel)) +
  theme_bw() +
  labs(x="", y="Avg. group linkage") +
  scale_color_manual(values=c("#0054A6", "#E4003B", "#E39E00", "#E1D700")) +
  theme(legend.title = element_blank(),
        legend.position = "right")

ggsave("outputs/figure1b.pdf", width=6, height=6)

# Comparison of parliamentary speeches and press releases (Figure D1 + D2; Table D1) ----

# read in data
press <- read_rds(press_path) %>% 
  mutate(nv=probpositive-probnegative)

hoc <- read_rds(groupappeals_path) %>% 
  mutate(nv=probpositive-probnegative)

# keep only temporal overlap

press <- press |>
  filter(date >= min(hoc$date) & date <= max(hoc$date))

hoc <- hoc |>
  filter(date >= min(press$date) & date <= max(press$date))

# number of mentions in PARTYRPESS (roughly 20k)
nrow(press) 

# calculate dyad-level NV sums at the month level

# summarize 'nv' by quarter, group, and party for 'hoc'
hoc_ts <- hoc %>%
  group_by(quarter = floor_date(date, "quarter"), group, party) %>%
  summarise(nv_hoc = sum(nv),
            count_hoc=n()) %>%
  mutate(nvav_hoc = nv_hoc/count_hoc) %>%
  ungroup()

# summarize 'nv' by quarter, group, and party for 'press'
press_ts <- press %>%
  group_by(quarter = floor_date(date, "quarter"), group, party) %>%
  summarise(nv_press = sum(nv),
            count_press = n()) %>%
  mutate(nvav_press = nv_press/count_press) %>%
  ungroup()

# combine them in one df
combined_ts <- merge(hoc_ts, press_ts, by = c("quarter", "group", "party"), all = TRUE)

# correlate
cor.test(combined_ts$nvav_hoc,combined_ts$nvav_press)

## Regression models (Table D1) ----
combined_ts$dyad <- interaction(combined_ts$party, combined_ts$group, sep = " / ")
unique(combined_ts$dyad) # clustering var, 60 levels

parlvpress_mods <- list(
  `Model 1` = feols(nvav_press ~ nvav_hoc, data=combined_ts),
  `Model 2` = feols(nvav_press ~ nvav_hoc | group, cluster = "group", data=combined_ts),
  `Model 3` = feols(nvav_press ~ nvav_hoc | group + party, cluster = "group", data=combined_ts),
  `Model 4` = feols(nvav_press ~ nvav_hoc | group + party, cluster = "dyad", data=combined_ts))

modelsummary(parlvpress_mods,
             output = "outputs/tableD1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvav_hoc" = "Net valence (average)",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

## Hexbin plot (Figure D1) ----
ggplot(combined_ts, aes(x = nvav_hoc, y = nvav_press)) +
  geom_hex(bins = 100) +  # Hexbin plot
  #  geom_smooth(method = "loess", se = TRUE, color = "gray10", fill = "steelblue", alpha = 0.15) +  # LOESS curve
  geom_smooth(method="lm",alpha=.15,color="gray10",fill="steelblue") +
  #scale_fill_viridis_c(option = "cividis") +  
  labs(title = " ",
       x = "Parliamentary speeches",
       y = "Press releases") +
  coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1)) + 
  theme_bw() +
  theme(legend.position = "none",axis.text.y = element_text(color = "black"))

ggsave("outputs/figureD1.pdf",width=5,height=4)

## Mention counts bar chart (Figure D2) ----

# summarize 'n' in 'hoc' and 'press' per quarter-group-party triad
hoc_ts_y <- hoc %>%
  group_by(year = floor_date(date, "year"), group, party) %>%
  summarise(count_hoc=n()) %>%
  ungroup()

press_ts_y <- press %>%
  group_by(year = floor_date(date, "year"), group, party) %>%
  summarise(count_press = n()) %>%
  ungroup()

combined_ts_y <- merge(hoc_ts_y, press_ts_y, by = c("year", "group", "party"), all = TRUE)

# reshape to long format
combined_long <- combined_ts_y %>%
  mutate(observation = row_number()) %>%
  mutate(count_hoc = ifelse(is.na(count_hoc), 0, count_hoc),
         count_press = ifelse(is.na(count_press), 0, count_press)) %>%
  select(observation, count_hoc, count_press) %>%
  pivot_longer(cols = c(count_hoc, count_press), 
               names_to = "type", 
               values_to = "count")

# order by count_hoc in descending order
combined_long <- combined_long %>%
  mutate(observation = factor(observation, 
                              levels = observation[type == "count_hoc"][order(count[type == "count_hoc"], decreasing = TRUE)])) %>%
  arrange(observation)  # sort the rows based on the new ordered factor

# create chart
ggplot(combined_long, aes(x = observation, y = count, fill = type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "",
       x = "Group-party-year observation",
       y = "Mention count") +
  scale_fill_manual(values = c("steelblue", "grey10"),
                    labels = c("Parliamentary speeches", "Press releases")) +
  theme_minimal() +
  theme(
    legend.title = element_blank(),
    legend.position = c(1, 0.95),  # Position legend at the upper right corner
    legend.justification = c(1, 1),  # Adjust justification so the legend correctly aligns
    axis.text.x = element_blank(),
    axis.title.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.background = element_rect(fill = "white", color = "white"),
    plot.background = element_rect(fill = "white", color = "white"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

summary(combined_long$count[combined_long$type=="count_hoc"]) # median: 67, mean: 379
summary(combined_long$count[combined_long$type=="count_press"]) # median: 3, mean: 32

ggsave("outputs/figureD2.pdf",width=8,height=4)

# Distribution of net valence (Figure 2b + E1) ----

## Distribution of NV (Figure 2b) ----

nvsd <- sd(sd$nvsum) # 173

#note we call this 'nvrt' to emphasize that this distributes net valence running tallies, not net valences per se (which we plot in fig 2a)

nvrthistplot1 <- ggplot(sd,aes(x=nvsum,y=after_stat(count))) +
  geom_histogram(fill="green3") +
  xlim(c(-36,750)) +
  theme_bw() +
  labs(x="Net valence running tally",y="Count") +
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE),breaks=c(0,250e3,500e3,750e3,1000e3,1250e3))

xbraceseq <- seq(from=0,to=173,length.out=nrow(sd))
ybraceseq <- seq(from=2.5e5,to=2.7e5,length.out=nrow(sd))

(nvrthistplot2 <- nvrthistplot1 + 
    ggbrace::stat_brace(rotate = 0,outerstart = 2.3e5,width=80000,aes(x=xbraceseq,y=ybraceseq)) +
    annotate(geom="text",x=86,y=7.8e5,label="0 -> 173\n=\n1 SD\n~=\n2 net positive\nappeals per day",size=3))

nv95thpercentile <- quantile(sd$nvsum, probs = seq(0, 1, 0.05), na.rm=TRUE)[20] # 475

(nvrthistplot3 <- nvrthistplot2 +
    geom_vline(xintercept = nv95thpercentile,linetype="dashed",alpha=.6) +
    annotate(geom="text",x=nv95thpercentile+30,y=5e5,label="95th percentile",size=3,alpha=.7,angle=-90))

ggsave("outputs/figure2b.pdf",width=3.6,height=2.4)

## Distribution of NV by group (Figure E1) ----

nvrthistplot1_bygroup <- ggplot(filter(sd,!is.na(group)),aes(x=nvsum,y=after_stat(density))) +
  geom_histogram(fill="green3", binwidth = 20) +
  theme_bw() +
  labs(x="Net valence running tally",y="Prop.") +
  facet_wrap(~group,scales = "free_y",ncol=3)

ggsave("outputs/figureE1.pdf",width=6,height=7)

# Descriptives: average net valence (Figure 2a + 3 + 4 + F1; Table F1 + F2) ----

longsentsdf <- read_rds(groupappeals_path) %>% 
  mutate(nv=probpositive-probnegative)

nrow(longsentsdf) # number of sentences in our data

# share of appeals with below 0.5/negative valence: 
below05 <- longsentsdf |>
  filter(nv<0.5) |>
  nrow()

below0 <- longsentsdf |>
  filter(nv<0) |>
  nrow()

total <- nrow(longsentsdf)

below05/total # 27 pct.
below0/total # 8 pct.

# remove urban and rural people
longsentsdf <- longsentsdf %>% 
  filter(!group %in% c("urban people", "rural people"))

## Net valences histogram (Figure 2a) ----

ggplot(longsentsdf,aes(x=nv,y=after_stat(count))) +
  geom_histogram(fill="green3", bins = 30, center = 0) +
  theme_bw() +
  labs(x="Net valence",y="Count") +
  scale_y_continuous(labels=function(x) format(x, big.mark = ",", scientific = FALSE))

ggsave("outputs/figure2a.pdf",width=3.6,height=2.4)

## Avg. group net valence (Figure 3; Table F1) ----

#calculate avg. group valence
groupavgdf <- longsentsdf %>% 
  group_by(group) %>% 
  summarise(groupnetvalence=mean(nv)) %>% 
  ungroup()
  
#estimate nv by group without intercept
nvbygrouplm <- lm(nv~group-1,data=longsentsdf)

modelsummary(nvbygrouplm,
             output = "outputs/tableF1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("groupatheists" = "Atheists",
                          "groupChristians" = "Christians",
                          "groupJews" = "Jews",
                          "groupMuslims" = "Muslims",
                          "groupworking class people" = "Working class people",
                          "groupmiddle-class people" = "Middle-class people",
                          "groupthe unemployed" = "The unemployed",
                          "groupethnic minorities" = "Ethnic minorities",
                          "groupmen" = "Men",
                          "groupwomen" = "Women",
                          "grouppeople from London" = "People from London",
                          "groupelderly people" = "Elderly people",
                          "groupyoung people" = "Young people"),
             gof_omit = c("AIC|BIC|R2|RMSE|Log.Lik."))

#convert to tidy df
nvbygrouplm_tidy <- broom::tidy(nvbygrouplm) %>% 
  mutate(grouplabel=str_remove(term,"group")) %>%
  #make first letter in group label uppercase
  mutate(grouplabel=str_to_sentence(grouplabel)) %>%
  #add 95 pct upper and lower bounds
  mutate(cilo95=estimate-1.96*std.error,cihi95=estimate+1.96*std.error) %>% 
  #add 90 pct bounds too
  mutate(cilo90=estimate-1.65*std.error,cihi90=estimate+1.65*std.error) %>%
  arrange(estimate) %>% 
  mutate(groupfac=fct_inorder(grouplabel))

#get % of mentions per group and join to regression df
groupfreqdf <- longsentsdf %>%
  count(group) %>%
  mutate(share = n / sum(n) * 100)

nvbygrouplm_tidy <- nvbygrouplm_tidy %>%
  left_join(groupfreqdf %>%
              mutate(grouplabel = str_to_sentence(group)), 
            by = "grouplabel") %>%
  mutate(share_label = paste0("(", round(share, 2), "%)"))

# create chart
ggplot(nvbygrouplm_tidy, aes(x = estimate, y = groupfac, color = estimate, fill = estimate)) + 
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_errorbarh(aes(xmin = cilo95, xmax = cihi95), height = 0) +
  geom_errorbarh(aes(xmin = cilo90, xmax = cihi90), height = 0, linewidth = 1) +
  geom_point(size = 1.5, shape = 21) +
  geom_text(aes(x = cihi95 + 0.015, label = share_label), 
            hjust = 0, size = 3) +
  theme_minimal() +
  labs(title = "", x = "Avg. net valence", y = "") +
  scale_color_gradient2(low = "firebrick2", high = "forestgreen", mid = "grey", midpoint = .5) +
  scale_fill_gradient2(low = "firebrick2", high = "forestgreen", mid = "grey", midpoint = .5) +
  scale_x_continuous(breaks = c(0, .2, .4, .6, .8), expand = expansion(mult = c(0.05, 0.15))) +
  theme(legend.position = "none", axis.text.y = element_text(color = "black"))

ggsave("outputs/figure3.pdf",width=6,height=3) 

## Avg. net valence by group (Figure 4; Table F2) ----

# construct group df
conlabdiffdf <- longsentsdf %>% 
  filter(party %in% c("Labour Party","Conservative Party")) %>% 
  mutate(conspeaker=ifelse(party=="Conservative Party",1,0))

groups <- unique(conlabdiffdf$group)

# first, get the table in appendix
models <- list()

getconlabdiff_table <- function(grouplabel){
  dfx <- filter(conlabdiffdf, group == grouplabel)
  lm(nv ~ conspeaker, data = dfx)
}

for(g in groups) {
  models[[g]] <- getconlabdiff_table(g)
}

modelsummary(models,
             output = "outputs/tableF2.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("conspeaker" = "Conservative speaker",
                          "(Intercept)" = "Intercept"),
             gof_map = c("nobs"))

#function for estimating con lab diff for a given group
getconlabdiff_figure <- function(grouplabel){
  dfx <- filter(conlabdiffdf,group==grouplabel)
  lmx <- lm(nv~conspeaker,data=dfx)
  lmxdf <- broom::tidy(lmx) %>% 
    filter(term=="conspeaker") %>% 
    mutate(cilo95=estimate-1.96*std.error,cihi95=estimate+1.96*std.error,
           cilo90=estimate-1.65*std.error,cihi90=estimate+1.65*std.error,
           group=grouplabel)
}

clgaps <- map(groups,getconlabdiff_figure) %>% 
  bind_rows() %>% 
  arrange(estimate) %>% 
  mutate(groupfac=fct_inorder(group))

midwaypoint <- (max(clgaps$estimate)-min(clgaps$estimate))/-2

# Create  chart
ggplot(clgaps, aes(x = estimate, y = groupfac,color=estimate)) + 
  #  geom_vline(xintercept = 0,linetype="dashed",alpha=.5) +
  #  geom_segment(aes(xend = 0, yend = groupfac), size = 2) + 
  geom_errorbarh(aes(xmin=cilo95,xmax=cihi95),height=0) +
  geom_errorbarh(aes(xmin=cilo90,xmax=cihi90),height=0,linewidth=1) +
  geom_point() +
  theme_minimal() +
  labs(title = "", x = "More favored by Labour <---> More favored by Conservatives", y = "") +
  #  labs(title = "Net valence by group", x = "Avg. net valence", y = "") +
  scale_color_gradient2(low = "firebrick2", high = "#0087DC", mid = "grey", midpoint = midwaypoint) +
  theme(legend.position = "none",axis.text.y = element_text(color = "black"))

ggsave("outputs/figure4.pdf",width=6,height=3) 

## Dyad-level stability (Figure F1) -----

ls2 <- longsentsdf %>% 
  #create year chr variable
  mutate(yearchr = as.character(year(date))) %>% 
  #create party-group-year dyad variable
  mutate(dyadyear = interaction(party, group, yearchr, sep = " / "))

length(unique(ls2$dyadyear)) # 1481

#count proportion of each group in ls2
groupsizes <- ls2 %>%
  group_by(group) %>%
  summarise(n = n()) %>%
  mutate(share = n / sum(n) * 100) %>%
  arrange(desc(share))
groupsizes
#reduce ls2 to lab and con and 7 largest groups
ls2r <- ls2 %>% 
  filter(party %in% c("Labour Party", "Conservative Party")) %>%
  filter(group %in% groupsizes$group[1:7]) %>% 
  #create new party-group-year dyad variable
  mutate(dyadyear = interaction(party, group, yearchr, sep = " / "))

#regress nv on dyadyear with no intercept
nvbydyadyearlm <- feols(nv ~ dyadyear - 1, data = ls2r)

#tidy model object
nvbydyadyearlm_tidy <- broom::tidy(nvbydyadyearlm) %>% 
  mutate(dyadyear = str_remove(term, "dyadyear")) %>%
  #separate dyadyear into party, group, and year by "/"
  separate(dyadyear, into = c("party", "group", "year"), sep = " / ") %>%
  #make year numeric
  mutate(year = as.numeric(year)) %>%
  #add 95 pct upper and lower bounds
  mutate(cilo95 = estimate - 1.96 * std.error,
         cihi95 = estimate + 1.96 * std.error) %>% 
  #add 90 pct bounds too
  mutate(cilo90 = estimate - 1.65 * std.error,
         cihi90 = estimate + 1.65 * std.error) %>%
  arrange(estimate) 

#line plot of estimate by year, group and color by party, facet by group
nvbydyadyearlm_tidy %>%
  ggplot(aes(x = year, y = estimate, color = party, group = party)) +
  geom_line() +
  geom_errorbar(aes(ymin = cilo90, ymax = cihi90), width = 0.2) +
  geom_errorbar(aes(ymin = cilo95, ymax = cihi95), width = 0.1, alpha = 0.5) +
  facet_wrap(~group, scales = "free_y",ncol=2) +
  labs(x = "Year",
       y = "Net valence") +
  theme_minimal() +
  theme(legend.position = "none") +
  scale_color_manual(values=c("#0054A6","#E4003B"))

ggsave("outputs/figureF1.pdf",width=5,height=6)

# Main models (Table 1) ----

nv95thpercentile <- quantile(sd$nvsum, probs = seq(0, 1, 0.05), na.rm=TRUE)[20]

mainmods <- list(
  `Model 1` = feols(lookafter_na_100 ~ nvsum + nvn | group, cluster = "cl_dyadwave", data=sd),
  `Model 2` = feols(lookafter_na_100 ~ nvsum + nvn | group+party, cluster = "cl_dyadwave", data=sd),
  `Model 3` = feols(lookafter_na_100 ~ nvsum + nvn | group+party+wave, cluster = "cl_dyadwave", data=sd),
  `Model 4` = feols(lookafter_na_100 ~ nvsum + nvn | group+party+wave, cluster = "cl_dyadwave", data=sd,
                    subset = sd$nvsum<nv95thpercentile))

modelsummary(mainmods,
             output = "outputs/table1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# Prediction plot (Figure 5) ----

vizmod <- feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd)

nvmean <- mean(sd$nvsum) #104
nvmin <- min(sd$nvsum) #-35
nvmax <- max(sd$nvsum) #745
nvsd <- sd(sd$nvsum) #173

predictdf <- tibble(nvsum=seq(from=nvmin,to=nvmin+2*nvsd,length.out=50),nvn=mean(sd$nvn),
                    group="elderly people",id=1,wave=1,party="Labour")

predictionsdf <- predictions(vizmod,newdata = datagrid(nvsum=predictdf$nvsum), vcov = ~cl_dyadwave) %>% as_tibble()
ci90preds <- predictions(vizmod,newdata = datagrid(nvsum=predictdf$nvsum),conf_level = .90, vcov = ~cl_dyadwave) %>% as_tibble() %>% 
  transmute(conf.low90=conf.low,conf.high90=conf.high)
predictionsdf <- bind_cols(predictionsdf,ci90preds)

predictplot <- ggplot(predictionsdf,aes(x=nvsum,y=estimate,ymin=conf.low,ymax=conf.high)) +
  geom_hline(yintercept=0,linetype="dashed",alpha=.5) +
  geom_ribbon(fill="green3",alpha=.2) +
  geom_ribbon(fill="green3",alpha=.2,aes(ymin=conf.low90,ymax=conf.high90)) +
  geom_line(linewidth=1,color="midnightblue") +
  theme_bw() +
  labs(x="Net valence running tally",y="Group linkage") 

# increasing by one standard deviation (173):
predict0to173 <- tibble(nvsum=c(0,173),
                       group="elderly people",id=1,wave=1,party="Labour") # just picking a group and a party

# the percentage point increase of increasing NV by 1 standard deviation (173)
predictions(vizmod,newdata = datagrid(nvsum=predict0to173$nvsum), vcov = ~cl_dyadwave) %>% as_tibble()
50.5-38.3 # 12.2 points

# add annotations
(predictplot2 <- predictplot + stat_brace(rotate = 180,outerstart = 37,width=5,aes(x=seq(from=0,to=173,length.out=50))) +
    annotate(geom="text",x=88,y=30,label="1 SD",size=3))

(predictplot2 + 
    stat_brace(rotate = 90,outerstart = 173,width=6,aes(y=seq(from=38,to=50,length.out=50))) +
    annotate(geom="text",x=192,y=44.5,label="12.2pp",size=3))

ggsave("outputs/figure5.pdf",width=7,height=4)

# Heterogeneity across group types (Figure 6 + J1; Table J1 + J2) ----

# run models across group types
sd <- sd %>%
  mutate(grouptype = case_when(
    group %in% c("atheists", "Christians", "Jews", "Muslims") ~ "religion",
    group %in% c("elderly people", "young people") ~ "age",
    group %in% c("women", "men") ~ "gender",
    group %in% c("ethnic minorities") ~ "ethnicity",
    group %in% c("middle-class people", "working class people", "the unemployed") ~ "class",
    group %in% c("people from London") ~ "geography"
  ))

## by group type (Figure 6a)
hetmods1 <- list(
  `Religious` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="religion"),
  `Class` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="class"),
  `Age` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="age"),
  `Gender` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="gender")
)

## leaving out one group type at a time (Figure 6b)
hetmods2 <- list(
  `Atheists` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="atheists"),
  `Christians` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="Christians"),
  `Jews` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="Jews"),
  `Muslims` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="Muslims"),
  `Middle class` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="middle-class people"),
  `Working class` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="working class people"),
  `Unemployed` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="the unemployed"),
  `Young` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="young people"),
  `Elderly` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="elderly people"),
  `Men` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="men"),
  `Women` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="women"),
  `Blacks and Asians` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="ethnic minorities"),
  `Londoners` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group!="people from London")
)

# run models leaving out one dyad at a time
dyads <- sd %>%
  distinct(party, group) %>%
  mutate(dyad_id = paste(party, group, sep = " | "))

hetmods_dyads <- map(
  dyads$dyad_id,
  function(dyad) {
    # split the dyad_id into party and group
    parts <- strsplit(dyad, " \\| ")[[1]]
    this_party <- parts[1]
    this_group <- parts[2]
    
    # estimate the model without this dyad
    feols(
      lookafter_na_100 ~ nvsum + nvn | group + wave + party,
      cluster = "cl_dyadwave",
      data = filter(sd, !(group == this_group & party == this_party))
    )
  }
)

names(hetmods_dyads) <- dyads$dyad_id

# construct Table J1 and J2

# Append model 4 from the mainmods list to hetmods lists
hetmods1 <- c(hetmods1[1:4], list(`All` = mainmods[[4]]))
hetmods2 <- c(hetmods2[1:13], list(`All` = mainmods[[4]]))

modelsummary(hetmods1,
             output = "outputs/tableJ1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

modelsummary(hetmods2,
             output = "outputs/tableJ2.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

modelsummary(hetmods_dyads, # massive table, not printed in appendix
             output = "default",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

#create data frame with broom::tidy applied to all models in hetmods1 list
hetmods1_tidy <- map_dfr(hetmods1, broom::tidy, .id = "model") %>% 
  #keep only nvsum term
  filter(term == "nvsum") %>%
  #make ordered factor variable by estimate
  arrange(estimate) %>%
  #make model factor variable ordered by estimate using fct_inorder
  mutate(modelfac = fct_inorder(model)) %>% 
  #create variables for 90 and 95 pct. confidence intervals
  mutate(conf.low90 = estimate - 1.645*std.error,
         conf.high90 = estimate + 1.645*std.error,
         conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error) %>% 
  #create dummy for whether group is All
  mutate(all = ifelse(model == "All", 1, 0))

# Figure 6a: create dot plot of estimate using hetmods1_tidy data with 90 and 95 pct. confidence intervals as error bars
hetmods1_tidy %>% 
  ggplot(aes(x = estimate, y = modelfac,color=factor(all))) +
  #add dashed vertical line at 0
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height=0,linewidth=.8) +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height=0,linewidth=.2) +
  labs(x = "Coefficient on net valence") +
  #manually set colors
  scale_color_manual(values = c("green3","darkgreen")) +
  ylab("Group type subset") +
  #remove legend
  theme_bw() +
  theme(legend.position = "none")

ggsave("outputs/figure6a.pdf",width=4,height=4)

#create data frame with broom::tidy applied to all models in hetmods2 list
hetmods2_tidy <- map_dfr(hetmods2, broom::tidy, .id = "model") %>% 
  #keep only nvsum term
  filter(term == "nvsum") %>%
  #make ordered factor variable by estimate
  arrange(estimate) %>%
  #make model factor variable ordered by estimate using fct_inorder
  mutate(modelfac = fct_inorder(model)) %>% 
  #create variables for 90 and 95 pct. confidence intervals
  mutate(conf.low90 = estimate - 1.645*std.error,
         conf.high90 = estimate + 1.645*std.error,
         conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error) %>% 
  #create dummy for whether group is All
  mutate(all = ifelse(model == "All", 1, 0))

# Figure 6b: create dot plot of estimate using hetmods2_tidy data with 90 and 95 pct. confidence intervals as error bars
hetmods2_tidy %>% 
  ggplot(aes(x = estimate, y = modelfac,color=factor(all))) +
  #add dashed vertical line at 0
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height=0,linewidth=.8) +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height=0,linewidth=.2) +
  labs(x = "Coefficient on net valence") +
  #manually set colors
  scale_color_manual(values = c("green3","darkgreen")) +
  ylab("Group omitted") +
  #remove legend
  theme_bw() +
  theme(legend.position = "none")

ggsave("outputs/figure6b.pdf",width=4,height=4)

#create data frame with broom::tidy applied to all models in hetmods_dyads list
hetmods_dyads_tidy <- map_dfr(hetmods_dyads, broom::tidy, .id = "dyad") %>% 
  filter(term == "nvsum") %>%
  arrange(estimate) %>%
  mutate(dyad_fac = fct_inorder(dyad)) %>%
  mutate(
    conf.low90 = estimate - 1.645 * std.error,
    conf.high90 = estimate + 1.645 * std.error,
    conf.low95 = estimate - 1.96 * std.error,
    conf.high95 = estimate + 1.96 * std.error
  )

# Figure J1: create dot plot of estimate using hetmods_dyads_tidy data with 90 and 95 pct. confidence intervals as error bars
ggplot(hetmods_dyads_tidy, aes(x = estimate, y = dyad_fac)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point(color = "green3") +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height = 0, linewidth = .8, color = "green3") +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height = 0, linewidth = .2, color = "green3") +
  labs(x = "Coefficient on net valence", y = "Omitted party-group dyad") +
  theme_bw() +
  theme(
    legend.position = "none",
    axis.text.y = element_text(size = 8)
  )

ggsave("outputs/figureJ1.pdf",width=6,height=4)

# Heterogeneity by policy mentions (Figure 7a; Table K1) ----

policymods <- list(
  `All appeals` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd),
  `Appeals without policy` = feols(lookafter_na_100 ~ nvsum_nopolicy + nvn_nopolicy | group+wave+party, cluster = "cl_dyadwave", data=sd),
  `Appeals with policy` = feols(lookafter_na_100 ~ nvsum_policy + nvn_policy | group+wave+party, cluster = "cl_dyadwave", data=sd)
)

# Table K1
modelsummary(policymods,
             output = "outputs/tableK1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "nvsum_policy" = "Net valence (sum)",
                          "nvn_policy" = "Number of appeals",
                          "nvsum_nopolicy" = "Net valence (sum)",
                          "nvn_nopolicy" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# create table for plotting Figure 7a
modelsummary(policymods,
             output = "markdown",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "nvsum_policy" = "Net valence (sum)",
                          "nvn_policy" = "Number of appeals",
                          "nvsum_nopolicy" = "Net valence (sum)",
                          "nvn_nopolicy" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

#create data frame with broom::tidy applied to all models in policymods list
nopolicy_tidy <- map_dfr(policymods, tidy, .id = "model") %>% 
  #keep only nvsum term
  filter(str_detect(term,"nvsum")) %>%
  #make ordered factor variable by estimate
  arrange(estimate) %>%
  #make model factor variable ordered by estimate using fct_inorder
  mutate(modelfac = fct_inorder(model)) %>% 
  #create variables for 90 and 95 pct. confidence intervals
  mutate(conf.low90 = estimate - 1.645*std.error,
         conf.high90 = estimate + 1.645*std.error,
         conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error) %>% 
  #create dummy for whether group is All
  mutate(all = ifelse(model == "All appeals", 1, 0))

# Figure 7a: create dot plot of estimate using policy_hetplot data with 90 and 95 pct. confidence intervals as error bars
policy_hetplot <- nopolicy_tidy %>% 
  ggplot(aes(x = estimate, y = modelfac,color=factor(all))) +
  #add dashed vertical line at 0
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height=0,linewidth=.5) +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height=0,linewidth=.2) +
  labs(x = "Net valence estimate", y = "Model") +
  #manually set colors
  scale_color_manual(values = c("green2","green4")) +
  ylab("Appeal type") +
  #remove legend
  theme_bw() +
  theme(legend.position = "none")

policy_hetplot

ggsave("outputs/figure7a.pdf",width=4,height=2)

# Heterogeneity by recent news consumption (Figure 7b; Table N1) ----

# construct binary news consumption variables
sd <- sd |>
  mutate(TV = ifelse(infoTV>1,1,ifelse(is.na(infoTV),NA_real_,0)),
         Paper = ifelse(infoPaper>1,1,ifelse(is.na(infoPaper),NA_real_,0)),
         Radio = ifelse(infoRadio>1,1,ifelse(is.na(infoRadio),NA_real_,0)),
         Internet = ifelse(infoInternet>1,1,ifelse(is.na(infoInternet),NA_real_,0)),
         People = ifelse(infoPeople>1,1,ifelse(is.na(infoInternet),NA_real_,0)),
         
         anynews = ifelse(TV+Paper+Radio==0,0,1)
  )

news_moderation <- list(
  `TV` = feols(lookafter_na_100 ~ nvsum*TV + nvn | group+party+wave+id, cluster = "cl_dyadwave", data=sd),
  `Newspapers` = feols(lookafter_na_100 ~ nvsum*Paper + nvn | group+party+wave+id, cluster = "cl_dyadwave", data=sd),
  `Radio` = feols(lookafter_na_100 ~ nvsum*Radio + nvn | group+party+wave+id, cluster = "cl_dyadwave", data=sd),
  `Any news source` = feols(lookafter_na_100 ~ nvsum*anynews + nvn | group+party+wave+id, cluster = "cl_dyadwave", data=sd))

sort(unique(sd$wave[complete.cases(sd[c("lookafter_na_100", "nvsum", "nvn", "TV")])])) # 4 waves (12,15,19,20)

# Table N1
modelsummary(news_moderation,
             output = "outputs/tableN1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "TV" = "Consume news",
                          "nvsum:TV" = "Net valence X consume news",
                          "Paper" = "Consume news",
                          "nvsum:Paper" = "Net valence X consume news",
                          "Radio" = "Consume news",
                          "nvsum:Radio" = "Net valence X consume news",
                          "anynews" = "Consume news",
                          "nvsum:anynews" = "Net valence X consume news",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# create data frame with broom::tidy applied to all models in news_moderation list
news_moderation_tidy <- map_dfr(news_moderation, broom::tidy, .id = "model") %>% 
  filter(str_detect(term, ":")) %>%
  arrange(estimate) %>%
  mutate(modelfac = fct_inorder(model)) %>% 
  mutate(conf.low90 = estimate - 1.645*std.error,
         conf.high90 = estimate + 1.645*std.error,
         conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error) %>% 
  mutate(mainmodel = ifelse(model == "Any news source", 1, 0))

# Figure 7b: create dot plot of interaction estimates
news_hetplot <- news_moderation_tidy %>% 
  ggplot(aes(x = estimate, y = modelfac, color = factor(mainmodel))) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height = 0, linewidth = .5) +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height = 0, linewidth = .2) +
  labs(x = "NV by news consumption interaction", y = "News source") +
  scale_color_manual(values = c("green2", "green4")) +
  theme_bw() +
  theme(legend.position = "none")

ggsave("outputs/figure7b.pdf",width=4,height=2)

# Accounting for changing party policy (Table M2) ----

partyposmods <- list(
  `Class` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="class"),
  `Class` = feols(lookafter_na_100 ~ nvsum + nvn + partypos_leftright + partypos_welfare | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="class"),
  `Women` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group=="women"),
  `Women` = feols(lookafter_na_100 ~ nvsum + nvn + partypos_women | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$group=="women"),
  `Ethnic minorities` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="ethnicity"),
  `Ethnic minorities` = feols(lookafter_na_100 ~ nvsum + nvn + partypos_cultdom | group+wave+party, cluster = "cl_dyadwave", data=sd, subset = sd$grouptype=="ethnicity"),
  `All` = feols(lookafter_na_100 ~ nvsum + nvn | group+wave+party, cluster = "cl_dyadwave", data=sd),
  `All` = feols(lookafter_na_100 ~ nvsum + nvn + partypos_leftright + partypos_welfare + partypos_cultdom + partypos_women | group+wave+party, cluster = "cl_dyadwave", data=sd)
)

modelsummary(partyposmods,
             output = "outputs/tableM2.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "partypos_leftright" = "Party: economy",
                          "partypos_welfare" = "Party: welfare", 
                          "partypos_women" = "Party: women",
                          "partypos_cultdom" = "Party: cult. sup.", 
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# Alternative model specifications (Table G1) ----

# generate lookafter_na_100 as an ordered factor for ordered logit
sd$lookafter_na_100_factor <- ordered(sd$lookafter_na_100)

# generate average net valence variable to interact with 'nvn' for alternative model specification
sd <- sd |>
  mutate(nvavg = ifelse(nvn>0, nvsum/nvn, 0))

# rename 'group' variable to avoid conflicts with outputs of 'marginaleffects' for Model 7
sd$group_ <- sd$group

altmods <- list(
  `Model 1` = feols(lookafter_na_100 ~ nvsum + nvn, cluster = "cl_dyadwave", data=sd),
  `Model 2` = feols(lookafter_na_100 ~ nvn, cluster = "cl_dyadwave", data=sd),
  `Model 3` = feols(lookafter_na_100 ~ nvn | group, cluster = "cl_dyadwave", data=sd),
  `Model 4` = feols(lookafter_na_100 ~ nvn | group+wave, cluster = "cl_dyadwave", data=sd),
  `Model 5` = feols(lookafter_na_100 ~ nvn | group+wave+party, cluster = "cl_dyadwave", data=sd),
  `Model 6` = polr(lookafter_na_100_factor ~ nvsum + nvn + factor(group) + factor(party) + factor(wave), data = sd, Hess = TRUE),
  `Model 7` = feols(lookafter_na_100 ~ nvn*nvavg | group_+party+wave, cluster = "cl_dyadwave", data=sd)
  )

modelsummary(altmods,
             output = "outputs/tableG1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "nvavg" = "Average valence",
                          "nvn:nvavg" = "Average valence X number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# Panel models (Table H1) ----

sd_panelonly <- sd |>
  filter(BES=="BESIP")

panelmods <- list(
  `Model 1` = feols(lookafter_na_100 ~ nvsum + nvn | group, cluster = "cl_dyadwave", data=sd_panelonly),
  `Model 2` = feols(lookafter_na_100 ~ nvsum + nvn | group+id, cluster = "cl_dyadwave", data=sd_panelonly),
  `Model 3` = feols(lookafter_na_100 ~ nvsum + nvn | group+id+wave, cluster = "cl_dyadwave", data=sd_panelonly),
  `Model 4` = feols(lookafter_na_100 ~ nvsum + nvn | group+id+wave+party, cluster = "cl_dyadwave", data=sd_panelonly))

modelsummary(panelmods,
             output = "outputs/tableH1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# Varying exposure windows (Figure I1; Table I1) ----

windowmods <- list(
  `60 days` = feols(lookafter_na_100 ~ nvsum60 + nvn60 | group+party+wave, cluster = "cl_dyadwave", data=sd),
  `75 days` = feols(lookafter_na_100 ~ nvsum75 + nvn75 | group+party+wave, cluster = "cl_dyadwave", data=sd),
  `90 days` = feols(lookafter_na_100 ~ nvsum + nvn | group+party+wave, cluster = "cl_dyadwave", data=sd),
  `105 days` = feols(lookafter_na_100 ~ nvsum105 + nvn105 | group+party+wave, cluster = "cl_dyadwave", data=sd),
  `120 days` = feols(lookafter_na_100 ~ nvsum120 + nvn120 | group+party+wave, cluster = "cl_dyadwave", data=sd))

# create latex table
modelsummary(windowmods,
             output = "outputs/tableI1.tex",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "nvsum60" = "Net valence (sum)",
                          "nvn60" = "Number of appeals",
                          "nvsum75" = "Net valence (sum)",
                          "nvn75" = "Number of appeals",
                          "nvsum105" = "Net valence (sum)",
                          "nvn105" = "Number of appeals",
                          "nvsum120" = "Net valence (sum)",
                          "nvn120" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

# create table for plot
modelsummary(windowmods,
             output = "markdown",
             fmt = 3,
             booktabs = TRUE,
             estimate = "{estimate}{stars}",
             #             statistic = "({std.error})",
             coef_map = c("nvsum" = "Net valence (sum)",
                          "nvn" = "Number of appeals",
                          "nvsum60" = "Net valence (sum)",
                          "nvn60" = "Number of appeals",
                          "nvsum75" = "Net valence (sum)",
                          "nvn75" = "Number of appeals",
                          "nvsum105" = "Net valence (sum)",
                          "nvn105" = "Number of appeals",
                          "nvsum120" = "Net valence (sum)",
                          "nvn120" = "Number of appeals",
                          "(Intercept)" = "Intercept"),
             gof_omit = c("AIC|BIC|R2|RMSE"))

#create data frame with broom::tidy applied to all models in windowmods list
windowmods_tidy <- map_dfr(windowmods, broom::tidy, .id = "model") %>% 
  #keep only nvsum term
  filter(str_detect(term,"nvsum")) %>%
  #make ordered factor variable by estimate
  arrange(estimate) %>%
  #make model factor variable ordered by estimate using fct_inorder
  mutate(modelfac = fct_inorder(model)) %>% 
  #create variables for 90 and 95 pct. confidence intervals
  mutate(conf.low90 = estimate - 1.645*std.error,
         conf.high90 = estimate + 1.645*std.error,
         conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error) %>% 
  #create dummy for whether group is our main model in paper
  mutate(mainmodel = ifelse(model == "90 days", 1, 0))

# Figure I1: create dot plot of estimate using windowmods_tidy data with 90 and 95 pct. confidence intervals as error bars
windowmods_tidy %>% 
  ggplot(aes(x = estimate, y = modelfac,color=factor(mainmodel))) +
  #add dashed vertical line at 0
  geom_vline(xintercept = 0, linetype = "dashed", alpha = .5) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low90, xmax = conf.high90), height=0,linewidth=.5) +
  geom_errorbarh(aes(xmin = conf.low95, xmax = conf.high95), height=0,linewidth=.2) +
  labs(x = "Net valence estimate", y = "Model") +
  #manually set colors
  scale_color_manual(values = c("green2","green4")) +
  ylab("Window length") +
  #remove legend
  theme_bw() +
  theme(legend.position = "none")

ggsave("outputs/figureI1.pdf",width=6,height=2)
